
BOT_NAME = 'scrapyDemo' #项目名

SPIDER_MODULES = ['scrapyDemo.spiders']
NEWSPIDER_MODULE = 'scrapyDemo.spiders'

ROBOTSTXT_OBEY = False #是否遵循机器人协议，默认是true，需要改为false，否则很多东西爬不了

CONCURRENT_REQUESTS = 30 #最大并发数，很好理解，就是同时允许开启多少个爬虫线程
CONCURRENT_REQUESTS_PER_DOMAIN = 100
CONCURRENT_REQUESTS_PER_IP = 100

DOWNLOAD_DELAY = 1 #下载延迟时间，单位是秒，控制爬虫爬取的频率

COOKIES_ENABLED = True #是否保存COOKIES，默认关闭，开机可以记录爬取过程中的COKIE，非常好用的一个参数

DOWNLOADER_MIDDLEWARES = {
   'scrapyDemo.middlewares.ScrapydemoDownloaderMiddleware': None,
    # 'scrapyDemo.middlewares.MyproxiesSpiderMiddleware': 500,
    'scrapyDemo.random_useragent.RandomUserAgentMiddleware' :400,
}

ITEM_PIPELINES = {
    # 'scrapyDemo.pipelines.ScrapydemoPipeline': 300,

    #当当网管道
    #'scrapyDemo.pipelines.ScrapydemoMySqlPipeline':200,

    #哔哩哔哩搜索管道
    # 'scrapyDemo.pipelines.ScrapyblibliMySqlPipeline':200,

    #哔哩哔哩每周热文排行榜
    'scrapyDemo.pipelines.blibliHotList':255
} #项目管道，300为优先级，越低越爬取的优先度越高

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
